# All-inclusive model
lm_pre_alpha <- lm(sold_price ~ . , data = data_factor_core)
summ(lm_pre_alpha)
# pre_alphaing for heteroskedasticity
# a. Graphically
par(mfrow = c(2,2))
plot(lm_pre_alpha)
#autoplot(lm_pre_alpha)
# b. Statistically
ols_test_breusch_pagan(lm_pre_alpha) # Breusch-Pagan test
# - Resolving Heteroskedasticity using heteroskedasticity-consistent (HC) variance covariance matrix
# Compare models
stargazer(lm_pre_alpha,
coeftest(lm_pre_alpha, vcov = vcovHC(lm_pre_alpha, method = "White2", type = "HC0")),
coeftest(lm_pre_alpha, vcov = vcovHC(lm_pre_alpha, method = "White2", type = "HC1")),
type = "text")
Note: Advisor suggested not to inlude interaction terms except for specific testing.
# Age
a <- ggplot(data_factor, aes(x = age , y = sold_price)) +
geom_smooth(aes(fill = infections_period)) +
geom_smooth(linetype = "dashed", color = "grey32") +
theme_minimal() +
#scale_fill_manual(values=c(very_low, med)) +
labs(title = "Age and Price",
x = "Age",
y = "Price") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Actual vs. fit
# Model with non-linear addition
lm_pre_alpha_age <- lm(sold_price ~ . + I(age^2), data = data_factor_core)
summ(lm_pre_alpha_age)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(66,24327) = 36748.75, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -12663.15 9510.30 -1.33 0.18
property_typeDUP -1420.31 2871.50 -0.49 0.62
property_typeOTH -2648.78 2053.45 -1.29 0.20
property_typePAT -626.11 929.50 -0.67 0.50
property_typeSGL 1784.81 437.71 4.08 0.00
property_typeTNH 510.26 551.55 0.93 0.35
ac_typenone -83.17 380.75 -0.22 0.83
ac_typenot_central -1707.03 245.87 -6.94 0.00
list_price 0.98 0.00 888.10 0.00
patio1 775.22 126.90 6.11 0.00
school_general1 151.15 161.81 0.93 0.35
photo_count -29.40 7.65 -3.84 0.00
pool1 -91.33 211.57 -0.43 0.67
roof_typeother 1123.91 232.86 4.83 0.00
roof_typeshingle 1815.51 262.56 6.91 0.00
roof_typeslate 404.62 1113.88 0.36 0.72
gas_typenatural 4180.15 8533.24 0.49 0.62
gas_typenone 3729.55 8529.10 0.44 0.66
gas_typepropane -124.79 8729.36 -0.01 0.99
gas_typeunknown 3388.93 8528.19 0.40 0.69
out_building1 -424.03 137.78 -3.08 0.00
area_living -0.82 0.27 -3.01 0.00
land_acres -305.11 154.40 -1.98 0.05
appliances1 850.31 172.71 4.92 0.00
garage1 623.51 127.04 4.91 0.00
property_conditionnew -4181.83 789.37 -5.30 0.00
property_conditionother -425.40 169.02 -2.52 0.01
energy_efficient1 589.01 141.61 4.16 0.00
exterior_typemetal -78.23 402.23 -0.19 0.85
exterior_typeother 35.68 167.52 0.21 0.83
exterior_typevinyl 390.50 185.92 2.10 0.04
exterior_typewood -646.79 262.80 -2.46 0.01
exterior_featurescourtyard 2427.56 1466.46 1.66 0.10
exterior_featuresfence 1028.53 614.35 1.67 0.09
exterior_featuresnone 1539.28 615.57 2.50 0.01
exterior_featuresporch 950.44 629.21 1.51 0.13
exterior_featurestennis_court 536.63 1724.79 0.31 0.76
fireplace1 408.15 131.51 3.10 0.00
foundation_typeslab 1016.20 191.35 5.31 0.00
foundation_typeunspecified -110.61 229.01 -0.48 0.63
area_total -0.15 0.16 -0.97 0.33
beds_total1 -441.27 3175.53 -0.14 0.89
beds_total2 -837.36 3145.00 -0.27 0.79
beds_total3 -195.13 3148.38 -0.06 0.95
beds_total4 639.15 3154.44 0.20 0.84
beds_total5 -183.17 3212.66 -0.06 0.95
bath_full1 2051.07 3355.33 0.61 0.54
bath_full2 2540.17 3355.08 0.76 0.45
bath_full3 2065.05 3363.12 0.61 0.54
bath_full4 -2648.80 3755.25 -0.71 0.48
bath_full6 -5631.95 9199.84 -0.61 0.54
bath_half1 -295.30 166.82 -1.77 0.08
bath_half2 -1640.99 1098.85 -1.49 0.14
bath_half3 1510.31 6029.59 0.25 0.80
bath_half4 8533.97 8532.40 1.00 0.32
bath_half5 -8590.00 4932.13 -1.74 0.08
age -124.18 11.06 -11.23 0.00
dom -7.97 1.08 -7.37 0.00
sold_date 0.17 0.07 2.64 0.01
sewer_typeseptic -185.34 237.12 -0.78 0.43
sewer_typeunspecified 275.03 129.35 2.13 0.03
property_stylenot_mobile 2262.11 353.36 6.40 0.00
subdivision1 396.83 151.52 2.62 0.01
water_typewell 641.52 599.64 1.07 0.28
waterfront1 -1671.69 225.43 -7.42 0.00
bottom25_dom1 2367.79 158.88 14.90 0.00
I(age^2) 1.16 0.14 8.32 0.00
-------------------------------------------------------------------------
# Marginal effects data frames
ggpredict_1 <- ggpredict(lm_pre_alpha, terms = "age")
ggpredict_2 <- ggpredict(lm_pre_alpha_age, terms = "age")
# Plots
b <- ggplot(data_factor_core, aes( x = age)) +
geom_smooth(data_factor_core, mapping = aes(y = sold_price), color = "grey50") +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Age and Price",
x = "Age",
y = "Prediction")
# Look at age & age^2 alone to see impact on more relevant y-axis scale
c <- ggplot() +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Age and Price",
x = "Age",
y = "Prediction")
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Living Area
# General graphing
a <- ggplot(data_factor, aes(x = area_living , y = sold_price)) +
geom_smooth(aes(fill = infections_period)) +
geom_smooth(linetype = "dashed", color = "grey32") +
theme_minimal() +
#scale_fill_manual(values=c(very_low, med)) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Price") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data_factor, aes(x = area_living , y = sold_price/area_living)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Actual vs. fit
# Model with non-linear addition
lm_pre_alpha_area <- lm(sold_price ~ . + I(area_living^2), data = data_factor_core)
summ(lm_pre_alpha_area)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(66,24327) = 36741.79, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -21671.88 9522.17 -2.28 0.02
property_typeDUP -1333.48 2871.86 -0.46 0.64
property_typeOTH -2804.77 2053.59 -1.37 0.17
property_typePAT -620.44 929.59 -0.67 0.50
property_typeSGL 1770.31 437.77 4.04 0.00
property_typeTNH 370.43 551.95 0.67 0.50
ac_typenone 62.25 381.06 0.16 0.87
ac_typenot_central -1498.05 246.37 -6.08 0.00
list_price 0.98 0.00 896.13 0.00
patio1 798.99 126.79 6.30 0.00
school_general1 241.58 161.60 1.49 0.13
photo_count -34.70 7.62 -4.55 0.00
pool1 -73.45 211.70 -0.35 0.73
roof_typeother 1098.57 233.01 4.71 0.00
roof_typeshingle 1920.08 261.94 7.33 0.00
roof_typeslate 536.02 1113.83 0.48 0.63
gas_typenatural 4855.78 8534.04 0.57 0.57
gas_typenone 4318.56 8530.00 0.51 0.61
gas_typepropane 87.56 8730.21 0.01 0.99
gas_typeunknown 3979.77 8529.01 0.47 0.64
out_building1 -490.59 137.56 -3.57 0.00
area_living 6.54 0.95 6.85 0.00
land_acres -285.71 154.41 -1.85 0.06
appliances1 921.60 172.47 5.34 0.00
garage1 666.84 126.78 5.26 0.00
property_conditionnew -3617.20 784.80 -4.61 0.00
property_conditionother -364.93 168.83 -2.16 0.03
energy_efficient1 601.45 141.63 4.25 0.00
exterior_typemetal 16.32 402.32 0.04 0.97
exterior_typeother 58.29 167.52 0.35 0.73
exterior_typevinyl 417.26 185.92 2.24 0.02
exterior_typewood -554.23 262.89 -2.11 0.04
exterior_featurescourtyard 2805.14 1465.90 1.91 0.06
exterior_featuresfence 1048.09 614.40 1.71 0.09
exterior_featuresnone 1584.20 615.59 2.57 0.01
exterior_featuresporch 1119.15 628.89 1.78 0.08
exterior_featurestennis_court 870.69 1724.92 0.50 0.61
fireplace1 264.36 131.42 2.01 0.04
foundation_typeslab 819.18 189.82 4.32 0.00
foundation_typeunspecified -213.55 228.49 -0.93 0.35
area_total -0.27 0.16 -1.71 0.09
beds_total1 -1072.82 3176.15 -0.34 0.74
beds_total2 -2553.13 3149.94 -0.81 0.42
beds_total3 -2327.60 3156.66 -0.74 0.46
beds_total4 -1389.50 3161.99 -0.44 0.66
beds_total5 -1954.03 3218.17 -0.61 0.54
bath_full1 3642.81 3358.78 1.08 0.28
bath_full2 3719.16 3356.69 1.11 0.27
bath_full3 3740.67 3367.07 1.11 0.27
bath_full4 -544.43 3761.07 -0.14 0.88
bath_full6 -3367.38 9198.35 -0.37 0.71
bath_half1 -274.20 167.01 -1.64 0.10
bath_half2 -1480.96 1099.19 -1.35 0.18
bath_half3 1451.22 6030.19 0.24 0.81
bath_half4 7762.17 8533.71 0.91 0.36
bath_half5 -8041.27 4933.05 -1.63 0.10
age -37.00 3.75 -9.87 0.00
dom -8.28 1.08 -7.66 0.00
sold_date 0.28 0.06 4.35 0.00
sewer_typeseptic -304.75 236.80 -1.29 0.20
sewer_typeunspecified 258.97 129.37 2.00 0.05
property_stylenot_mobile 2105.89 353.77 5.95 0.00
subdivision1 401.26 151.53 2.65 0.01
water_typewell 557.86 599.60 0.93 0.35
waterfront1 -1642.44 225.40 -7.29 0.00
bottom25_dom1 2331.25 158.82 14.68 0.00
I(area_living^2) -0.00 0.00 -8.04 0.00
-------------------------------------------------------------------------
# Model with single-variable fit
lm_pre_alpha_area_single <- lm(sold_price ~ area_living, data = data_factor_core)
summ(lm_pre_alpha_area_single)
MODEL INFO:
Observations: 24412
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(1,24410) = 14244.19, p = 0.00
R² = 0.37
Adj. R² = 0.37
Standard errors: OLS
-------------------------------------------------------
Est. S.E. t val. p
----------------- ----------- --------- -------- ------
(Intercept) -20238.66 1644.55 -12.31 0.00
area_living 113.16 0.95 119.35 0.00
-------------------------------------------------------
# Marginal effects data frames
ggpredict_1 <- ggpredict(lm_pre_alpha, terms = "area_living") # total model
ggpredict_2 <- ggpredict(lm_pre_alpha_area, terms = "area_living") # non-linear addition
ggpredict_3 <- ggpredict(lm_pre_alpha_area_single, terms = "area_living") # single-variable fit
# Plots
b <- ggplot(data_factor_core, aes(x = area_living)) +
geom_smooth(data_factor, mapping = aes(y = sold_price), color = "grey50") +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Prediction")
# Look at age & age^2 alone to see impact on more relevant y-axis scale
c <- ggplot() +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Prediction")
# Conclusion
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# General graphing
ggplot(data_factor, aes(x = land_acres , y = sold_price)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
ggplot(data_factor, aes(x = land_acres, y = sold_price/land_acres)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
#Additions
data_factor_core_clean <- data_factor_core
data_factor_core_clean$age_2 <- I(data_factor_core$age^2)
data_factor_core_clean$area_living_2 <- I(data_factor_core$area_living^2)
# Full model summary
summ(lm_pre_alpha)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(65,24328) = 37208.82, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -16471.89 9512.56 -1.73 0.08
property_typeDUP -1567.54 2875.46 -0.55 0.59
property_typeOTH -2779.57 2056.27 -1.35 0.18
property_typePAT -565.21 930.77 -0.61 0.54
property_typeSGL 1807.60 438.32 4.12 0.00
property_typeTNH 529.68 552.32 0.96 0.34
ac_typenone -55.39 381.27 -0.15 0.88
ac_typenot_central -1637.94 246.08 -6.66 0.00
list_price 0.98 0.00 895.29 0.00
patio1 833.04 126.89 6.57 0.00
school_general1 224.32 161.80 1.39 0.17
photo_count -34.93 7.63 -4.58 0.00
pool1 -157.09 211.72 -0.74 0.46
roof_typeother 1179.35 233.09 5.06 0.00
roof_typeshingle 1981.04 262.17 7.56 0.00
roof_typeslate 561.38 1115.28 0.50 0.61
gas_typenatural 4523.85 8545.09 0.53 0.60
gas_typenone 3936.67 8541.01 0.46 0.64
gas_typepropane -96.15 8741.58 -0.01 0.99
gas_typeunknown 3661.78 8540.06 0.43 0.67
out_building1 -490.46 137.74 -3.56 0.00
area_living -0.81 0.27 -2.97 0.00
land_acres -291.00 154.61 -1.88 0.06
appliances1 928.60 172.69 5.38 0.00
garage1 700.15 126.88 5.52 0.00
property_conditionnew -3450.47 785.55 -4.39 0.00
property_conditionother -354.50 169.05 -2.10 0.04
energy_efficient1 592.05 141.81 4.18 0.00
exterior_typemetal -44.83 402.78 -0.11 0.91
exterior_typeother 54.80 167.73 0.33 0.74
exterior_typevinyl 410.53 186.16 2.21 0.03
exterior_typewood -611.49 263.14 -2.32 0.02
exterior_featurescourtyard 2804.70 1467.81 1.91 0.06
exterior_featuresfence 1047.68 615.20 1.70 0.09
exterior_featuresnone 1600.42 616.39 2.60 0.01
exterior_featuresporch 1133.90 629.71 1.80 0.07
exterior_featurestennis_court 718.57 1727.07 0.42 0.68
fireplace1 329.21 131.35 2.51 0.01
foundation_typeslab 813.97 190.06 4.28 0.00
foundation_typeunspecified -244.66 228.76 -1.07 0.28
area_total -0.20 0.16 -1.26 0.21
beds_total1 -654.41 3179.87 -0.21 0.84
beds_total2 -1146.38 3149.18 -0.36 0.72
beds_total3 -501.07 3152.57 -0.16 0.87
beds_total4 352.34 3158.67 0.11 0.91
beds_total5 -459.18 3216.99 -0.14 0.89
bath_full1 2422.94 3359.73 0.72 0.47
bath_full2 2893.97 3359.50 0.86 0.39
bath_full3 2434.57 3367.54 0.72 0.47
bath_full4 -2224.48 3760.16 -0.59 0.55
bath_full6 -3834.27 9210.17 -0.42 0.68
bath_half1 -365.47 166.84 -2.19 0.03
bath_half2 -1666.56 1100.38 -1.51 0.13
bath_half3 1695.30 6037.98 0.28 0.78
bath_half4 8512.66 8544.34 1.00 0.32
bath_half5 -8585.74 4939.03 -1.74 0.08
age -37.65 3.75 -10.03 0.00
dom -8.30 1.08 -7.68 0.00
sold_date 0.29 0.06 4.58 0.00
sewer_typeseptic -291.81 237.10 -1.23 0.22
sewer_typeunspecified 268.03 129.53 2.07 0.04
property_stylenot_mobile 2238.58 353.84 6.33 0.00
subdivision1 392.59 151.73 2.59 0.01
water_typewell 550.97 600.38 0.92 0.36
waterfront1 -1629.56 225.69 -7.22 0.00
bottom25_dom1 2327.32 159.03 14.63 0.00
-------------------------------------------------------------------------
# Check Variance Inflation Factors (VIF)
VIF(lm_pre_alpha)
GVIF Df GVIF^(1/(2*Df))
property_type 1.655889 5 1.051727
ac_type 1.268434 2 1.061248
list_price 2.930822 1 1.711965
patio 1.348818 1 1.161386
school_general 1.910528 1 1.382219
photo_count 1.405325 1 1.185464
pool 1.127715 1 1.061939
roof_type 1.708888 3 1.093416
gas_type 1.908222 4 1.084123
out_building 1.178861 1 1.085754
area_living 5.267544 1 2.295113
land_acres 1.735039 1 1.317209
appliances 1.400917 1 1.183604
garage 1.338709 1 1.157026
property_condition 1.574892 2 1.120244
energy_efficient 1.514844 1 1.230790
exterior_type 2.441128 4 1.118018
exterior_features 1.621144 5 1.049499
fireplace 1.416227 1 1.190054
foundation_type 1.809861 2 1.159875
area_total 4.294386 1 2.072290
beds_total 2.959420 5 1.114604
bath_full 2.956996 5 1.114513
bath_half 1.316344 5 1.027867
age 1.425310 1 1.193863
dom 1.685410 1 1.298233
sold_date 1.877109 1 1.370076
sewer_type 1.317497 2 1.071365
property_style 1.294108 1 1.137589
subdivision 1.156436 1 1.075377
water_type 1.060300 1 1.029709
waterfront 1.094766 1 1.046311
bottom25_dom 1.677313 1 1.295111
alias(lm_pre_alpha)
Model :
sold_price ~ property_type + ac_type + list_price + patio + school_general +
photo_count + pool + roof_type + gas_type + out_building +
area_living + land_acres + appliances + garage + property_condition +
energy_efficient + exterior_type + exterior_features + fireplace +
foundation_type + area_total + beds_total + bath_full + bath_half +
age + dom + sold_date + sewer_type + property_style + subdivision +
water_type + waterfront + bottom25_dom
# Total area and living area are found to be significantly (i.e. VIF > 5) multicolinear (expected)
# Solution: Remove area_total
# Note the significant drop in R^2 from 0.99 to 0.86
lm_pre_alpha_cleaned <- lm(log(sold_price) ~ . - area_total ,data = data_factor_core)
summ(lm_pre_alpha_cleaned)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: log(sold_price)
Type: OLS linear regression
MODEL FIT:
F(64,24329) = 2345.73, p = 0.00
R² = 0.86
Adj. R² = 0.86
Standard errors: OLS
------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ------- ------ -------- ------
(Intercept) 10.62 0.29 37.18 0.00
property_typeDUP -0.25 0.09 -2.90 0.00
property_typeOTH 0.03 0.06 0.44 0.66
property_typePAT -0.09 0.03 -3.05 0.00
property_typeSGL -0.06 0.01 -4.25 0.00
property_typeTNH 0.02 0.02 0.96 0.34
ac_typenone -0.60 0.01 -52.20 0.00
ac_typenot_central -0.16 0.01 -21.82 0.00
list_price 0.00 0.00 200.27 0.00
patio1 0.01 0.00 3.69 0.00
school_general1 0.01 0.00 2.07 0.04
photo_count 0.00 0.00 6.42 0.00
pool1 -0.04 0.01 -5.79 0.00
roof_typeother 0.04 0.01 5.08 0.00
roof_typeshingle 0.04 0.01 5.61 0.00
roof_typeslate 0.07 0.03 2.08 0.04
gas_typenatural -0.02 0.26 -0.09 0.92
gas_typenone 0.04 0.26 0.14 0.89
gas_typepropane -0.09 0.26 -0.36 0.72
gas_typeunknown 0.03 0.26 0.10 0.92
out_building1 0.02 0.00 5.76 0.00
area_living -0.00 0.00 -9.43 0.00
land_acres 0.04 0.00 8.23 0.00
appliances1 0.13 0.01 25.36 0.00
garage1 0.01 0.00 3.64 0.00
property_conditionnew -0.10 0.02 -4.30 0.00
property_conditionother -0.05 0.01 -9.13 0.00
energy_efficient1 0.02 0.00 3.77 0.00
exterior_typemetal -0.04 0.01 -3.65 0.00
exterior_typeother -0.01 0.01 -2.25 0.02
exterior_typevinyl 0.02 0.01 3.31 0.00
exterior_typewood -0.03 0.01 -3.98 0.00
exterior_featurescourtyard -0.02 0.04 -0.50 0.62
exterior_featuresfence 0.05 0.02 2.58 0.01
exterior_featuresnone 0.05 0.02 2.58 0.01
exterior_featuresporch 0.05 0.02 2.65 0.01
exterior_featurestennis_court 0.01 0.05 0.13 0.89
fireplace1 -0.01 0.00 -1.59 0.11
foundation_typeslab 0.11 0.01 19.35 0.00
foundation_typeunspecified 0.07 0.01 10.61 0.00
beds_total1 -0.30 0.10 -3.13 0.00
beds_total2 -0.27 0.09 -2.84 0.00
beds_total3 -0.22 0.09 -2.32 0.02
beds_total4 -0.25 0.09 -2.69 0.01
beds_total5 -0.22 0.10 -2.28 0.02
bath_full1 -0.20 0.10 -2.02 0.04
bath_full2 -0.11 0.10 -1.09 0.27
bath_full3 -0.20 0.10 -1.95 0.05
bath_full4 -0.20 0.11 -1.80 0.07
bath_full6 0.19 0.28 0.70 0.48
bath_half1 -0.03 0.01 -5.61 0.00
bath_half2 -0.04 0.03 -1.20 0.23
bath_half3 -0.12 0.18 -0.67 0.50
bath_half4 -0.36 0.26 -1.42 0.16
bath_half5 -0.05 0.15 -0.33 0.74
age 0.00 0.00 4.74 0.00
dom -0.00 0.00 -1.33 0.18
sold_date 0.00 0.00 3.32 0.00
sewer_typeseptic 0.01 0.01 1.83 0.07
sewer_typeunspecified 0.01 0.00 2.83 0.00
property_stylenot_mobile 0.21 0.01 19.52 0.00
subdivision1 -0.01 0.00 -2.56 0.01
water_typewell -0.01 0.02 -0.70 0.48
waterfront1 -0.02 0.01 -3.02 0.00
bottom25_dom1 0.03 0.00 5.96 0.00
------------------------------------------------------------------
VIF(lm_pre_alpha_cleaned)
GVIF Df GVIF^(1/(2*Df))
property_type 1.630912 5 1.050130
ac_type 1.267422 2 1.061036
list_price 2.851824 1 1.688734
patio 1.337380 1 1.156452
school_general 1.910521 1 1.382216
photo_count 1.404206 1 1.184992
pool 1.127687 1 1.061926
roof_type 1.693280 3 1.091746
gas_type 1.895309 4 1.083203
out_building 1.160958 1 1.077477
area_living 3.154333 1 1.776044
land_acres 1.713950 1 1.309179
appliances 1.400768 1 1.183540
garage 1.314365 1 1.146458
property_condition 1.565416 2 1.118555
energy_efficient 1.509515 1 1.228623
exterior_type 2.438052 4 1.117842
exterior_features 1.620152 5 1.049435
fireplace 1.416222 1 1.190051
foundation_type 1.806271 2 1.159300
beds_total 2.953692 5 1.114388
bath_full 2.946496 5 1.114116
bath_half 1.310540 5 1.027413
age 1.424511 1 1.193529
dom 1.684184 1 1.297761
sold_date 1.873621 1 1.368803
sewer_type 1.315119 2 1.070881
property_style 1.290468 1 1.135988
subdivision 1.155881 1 1.075119
water_type 1.059113 1 1.029132
waterfront 1.094521 1 1.046194
bottom25_dom 1.677311 1 1.295110
# Final pre_alpha
VIF(lm_pre_alpha_cleaned)
GVIF Df GVIF^(1/(2*Df))
property_type 1.630912 5 1.050130
ac_type 1.267422 2 1.061036
list_price 2.851824 1 1.688734
patio 1.337380 1 1.156452
school_general 1.910521 1 1.382216
photo_count 1.404206 1 1.184992
pool 1.127687 1 1.061926
roof_type 1.693280 3 1.091746
gas_type 1.895309 4 1.083203
out_building 1.160958 1 1.077477
area_living 3.154333 1 1.776044
land_acres 1.713950 1 1.309179
appliances 1.400768 1 1.183540
garage 1.314365 1 1.146458
property_condition 1.565416 2 1.118555
energy_efficient 1.509515 1 1.228623
exterior_type 2.438052 4 1.117842
exterior_features 1.620152 5 1.049435
fireplace 1.416222 1 1.190051
foundation_type 1.806271 2 1.159300
beds_total 2.953692 5 1.114388
bath_full 2.946496 5 1.114116
bath_half 1.310540 5 1.027413
age 1.424511 1 1.193529
dom 1.684184 1 1.297761
sold_date 1.873621 1 1.368803
sewer_type 1.315119 2 1.070881
property_style 1.290468 1 1.135988
subdivision 1.155881 1 1.075119
water_type 1.059113 1 1.029132
waterfront 1.094521 1 1.046194
bottom25_dom 1.677311 1 1.295110
alias(lm_pre_alpha_cleaned)
Model :
log(sold_price) ~ (property_type + ac_type + list_price + patio +
school_general + photo_count + pool + roof_type + gas_type +
out_building + area_living + land_acres + appliances + garage +
property_condition + energy_efficient + exterior_type + exterior_features +
fireplace + foundation_type + area_total + beds_total + bath_full +
bath_half + age + dom + sold_date + sewer_type + property_style +
subdivision + water_type + waterfront + bottom25_dom) - area_total
# Another way to check for multicollinearity is visually through the mcvis package
data_numeric <- select_if(data_factor_core, is.numeric) # Subset numeric columns with dplyr
mcvis_result <- mcvis(X = data_numeric)
a <- plot(mcvis_result)
par(mfrow = c(2,2))
#Removals
data_numeric <- subset(data_numeric, select = -c(list_price))
mcvis_result <- mcvis(X = data_numeric)
b <- plot(mcvis_result)
#Removals
data_numeric <- subset(data_numeric, select = -c(area_total))
mcvis_result <- mcvis(X = data_numeric)
c <- plot(mcvis_result)
a
b
c
NA
NA
NA
# Removals
# - Area_total
# - Listing price
par(mfrow = c(2,2))
data_factor_core_clean <- subset(data_factor_core_clean, select = -c(area_total, list_price))
data_factor_core_clean <- data_factor_core_clean[-c(23515), ]
cl <- makePSOCKcluster(5)
registerDoParallel(cl)
tab_model(lm_alpha, ci_method = "wald")
Profiled confidence intervals may take longer time to compute. Use 'ci_method="wald"' for faster computation of CIs.
# Waves of infection
ggplot(data_factor, aes(x = as.Date(sold_date), y = infections_3mma)) +
geom_point(color = low, alpha = 0.7) +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(infections_3mma))) +
xlab(" ") +
ylab("Confirmed Infections per Day") +
labs(title = "Waves of Infection",
caption = "") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-23")), linetype=4)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Accumulation of infections
ggplot(data_factor, aes(x = as.Date(sold_date), y = I(infections_accum/1000))) +
geom_point(color = low, alpha = 0.7) +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(I(infections_accum/1000)))) +
xlab(" ") +
ylab("Accumulation of Infections (in 000's)") +
labs(title = "Accumulation of Infections",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Infections and home prices
ggplot(data_factor, aes(x = I(infections_3mma/1000), y = sold_price)) +
#geom_point() +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_continuous( limits = c(0,max(I(infections_3mma/1000)))) +
xlab("3-Month Moving Average of Daily Infections (in 000's)") +
ylab("Sold Price (Actual)") +
labs(title = "Infections and Price",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# "#ff6c67", "#00c2c6"
ggplot(data_factor, aes(x = infections_period, y = sold_price/1000, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none") +
xlab("Infections Present (1 = yes)") +
ylab("Sold Price (in 000's)") +
scale_fill_manual(values=c(very_low, med)) +
labs(title = "Comparison of Sold Price",
caption = "e")
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
# Plots
ggplot(data_factor_core, aes(x = infections_3mma)) +
geom_smooth(data_factor_core, mapping = aes(y = sold_price), color = "grey50") + # Actual Data
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = low) + # Controlled model
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) + # Best single fit
ggtitle("Model Fit Overview")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
gridExtra::grid.arrange(a)
gridExtra::grid.arrange(b)
gridExtra::grid.arrange(c)
gridExtra::grid.arrange(d)
gridExtra::grid.arrange(e)
Ideas
coeftest(lm_corona_bedrooms, vcov = vcovHC(lm_corona_bedrooms, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.3414e+05 3.0157e+04 7.7640 8.553e-15 ***
ac_typenone -5.4498e+04 1.9525e+03 -27.9111 < 2.2e-16 ***
ac_typenot_central -2.2087e+04 1.6931e+03 -13.0457 < 2.2e-16 ***
patio1 1.2472e+04 8.3412e+02 14.9527 < 2.2e-16 ***
school_general1 8.2759e+03 1.0945e+03 7.5614 4.129e-14 ***
photo_count 1.3248e+03 4.9480e+01 26.7740 < 2.2e-16 ***
pool1 1.9075e+04 1.5001e+03 12.7159 < 2.2e-16 ***
roof_typeother 7.3681e+03 1.4621e+03 5.0392 4.708e-07 ***
roof_typeshingle 2.7197e+04 1.6956e+03 16.0396 < 2.2e-16 ***
roof_typeslate 1.5496e+04 9.0682e+03 1.7088 0.0874995 .
gas_typenatural -1.0756e+05 3.4559e+03 -31.1233 < 2.2e-16 ***
gas_typenone -1.3865e+05 2.2971e+03 -60.3594 < 2.2e-16 ***
gas_typepropane -9.3236e+04 1.8180e+04 -5.1285 2.943e-07 ***
gas_typeunknown -1.3842e+05 2.1427e+03 -64.5988 < 2.2e-16 ***
out_building1 -5.5192e+03 8.8805e+02 -6.2149 5.218e-10 ***
appliances1 2.5898e+04 1.1928e+03 21.7118 < 2.2e-16 ***
property_conditionnew -2.0935e+04 6.3471e+03 -3.2983 0.0009741 ***
property_conditionother -2.0956e+04 1.0429e+03 -20.0948 < 2.2e-16 ***
energy_efficient1 1.8928e+04 8.8970e+02 21.2746 < 2.2e-16 ***
exterior_typemetal -4.0964e+03 2.4309e+03 -1.6852 0.0919667 .
exterior_typeother 1.3327e+04 1.1559e+03 11.5302 < 2.2e-16 ***
exterior_typevinyl 3.0630e+03 1.2148e+03 2.5213 0.0116992 *
exterior_typewood 6.8287e+02 1.8873e+03 0.3618 0.7174878
exterior_featurescourtyard 3.8981e+04 1.4928e+04 2.6113 0.0090249 **
exterior_featuresfence -2.3394e+04 5.4658e+03 -4.2800 1.876e-05 ***
exterior_featuresnone -1.3995e+04 5.4825e+03 -2.5528 0.0106928 *
exterior_featuresporch -2.0091e+04 5.5495e+03 -3.6203 0.0002948 ***
exterior_featurestennis_court 2.3977e+04 1.3892e+04 1.7260 0.0843658 .
fireplace1 3.1903e+04 8.3534e+02 38.1915 < 2.2e-16 ***
foundation_typeslab 2.0170e+04 1.3210e+03 15.2687 < 2.2e-16 ***
foundation_typeunspecified 9.7919e+03 1.4755e+03 6.6362 3.286e-11 ***
beds_total1 -7.1968e+04 2.9707e+04 -2.4226 0.0154172 *
beds_total2 -5.4848e+04 2.9462e+04 -1.8616 0.0626666 .
beds_total3 -2.8699e+04 2.9463e+04 -0.9741 0.3300308
beds_total4 1.0509e+04 2.9483e+04 0.3564 0.7215164
beds_total5 1.7689e+04 3.0042e+04 0.5888 0.5559999
age -2.1730e+03 8.0380e+01 -27.0343 < 2.2e-16 ***
dom 8.3326e+00 6.9823e+00 1.1934 0.2327235
sewer_typeseptic -4.5359e+03 1.5203e+03 -2.9836 0.0028514 **
sewer_typeunspecified -4.4686e+03 8.1555e+02 -5.4792 4.314e-08 ***
property_stylenot_mobile 7.2464e+04 1.8129e+03 39.9708 < 2.2e-16 ***
subdivision1 2.7805e+03 9.7726e+02 2.8452 0.0044416 **
water_typewell -3.2549e+03 4.4298e+03 -0.7348 0.4624852
waterfront1 2.7540e+04 1.6343e+03 16.8507 < 2.2e-16 ***
bottom25_dom1 1.2415e+04 1.0822e+03 11.4716 < 2.2e-16 ***
age_2 1.9417e+01 1.1018e+00 17.6236 < 2.2e-16 ***
data_factor$infections_3mma -2.8687e+01 1.5003e+01 -1.9121 0.0558708 .
beds_total1:data_factor$infections_3mma 2.5212e+01 1.5454e+01 1.6314 0.1028173
beds_total2:data_factor$infections_3mma 3.2225e+01 1.5051e+01 2.1411 0.0322755 *
beds_total3:data_factor$infections_3mma 3.6766e+01 1.5014e+01 2.4488 0.0143384 *
beds_total4:data_factor$infections_3mma 3.7085e+01 1.5053e+01 2.4637 0.0137574 *
beds_total5:data_factor$infections_3mma 4.7405e+01 1.6056e+01 2.9525 0.0031552 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Find the mean of each group
library(plyr)
price_means <- ddply(data_factor, "infections_period", summarise, price_mean = mean(sold_price, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution") +
geom_vline(data=price_means, aes(xintercept = mean(sold_price)), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
xlab("Sold Price") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = sold_price, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions") +
geom_vline(data=price_means, aes(xintercept = price_means[2,2]), linetype="dashed", size= 0.4, color = med, alpha = 0.8) +
geom_vline(data = price_means, aes(xintercept = price_means[1,2]), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
xlab("Sold Price") +
ylab("Density") +
labs(fill = "Infection Period")
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = sold_price, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_sold_price, bottom25_sold_price), scales = "free") +
ggtitle("Price Distributions") +
scale_fill_manual(values=c(very_low, med)) +
xlab("Sold Price") +
labs(fill = "Infection Period") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#Price and Infections
ggplot(data_factor, aes(x = infections_period, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Sold Price") +
xlab("Infection Period") +
scale_fill_manual(values=c(very_low, med)) +
ylab("Sold Price")
coeftest(lm_corona_price_bottom, vcov = vcovHC(lm_corona_price_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6017e+05 2.1950e+04 11.8531 < 2.2e-16 ***
property_typeDUP -2.1684e+04 1.5731e+04 -1.3784 0.1680918
property_typeOTH 7.3064e+03 8.1447e+03 0.8971 0.3696882
property_typePAT 9.6853e+03 4.8857e+03 1.9824 0.0474454 *
property_typeSGL 1.8003e+04 2.2688e+03 7.9351 2.194e-15 ***
property_typeTNH -4.5428e+03 2.8170e+03 -1.6126 0.1068353
ac_typenone -2.4963e+04 1.3345e+03 -18.7056 < 2.2e-16 ***
ac_typenot_central -3.5920e+03 1.2040e+03 -2.9833 0.0028541 **
patio1 4.1811e+03 6.4168e+02 6.5158 7.372e-11 ***
school_general1 6.8812e+03 8.4726e+02 8.1217 4.811e-16 ***
photo_count 5.9194e+02 3.8741e+01 15.2795 < 2.2e-16 ***
pool1 1.0669e+04 1.2351e+03 8.6380 < 2.2e-16 ***
roof_typeother -3.3318e+02 1.1443e+03 -0.2912 0.7709303
roof_typeshingle 1.1199e+04 1.3489e+03 8.3025 < 2.2e-16 ***
roof_typeslate 2.6920e+03 7.2137e+03 0.3732 0.7090169
gas_typenatural -7.3831e+04 2.9774e+03 -24.7972 < 2.2e-16 ***
gas_typenone -1.0662e+05 2.0419e+03 -52.2141 < 2.2e-16 ***
gas_typepropane -6.9963e+04 1.4886e+04 -4.7000 2.616e-06 ***
gas_typeunknown -1.0796e+05 1.9714e+03 -54.7669 < 2.2e-16 ***
out_building1 -6.4916e+03 6.8667e+02 -9.4537 < 2.2e-16 ***
area_living -6.9603e+00 5.2515e+00 -1.3254 0.1850514
land_acres 1.9796e+03 7.4583e+02 2.6542 0.0079554 **
appliances1 1.0788e+04 8.4929e+02 12.7019 < 2.2e-16 ***
garage1 6.8509e+03 6.3401e+02 10.8056 < 2.2e-16 ***
property_conditionnew -8.4140e+03 5.3106e+03 -1.5844 0.1131184
property_conditionother -1.0277e+04 8.3230e+02 -12.3471 < 2.2e-16 ***
energy_efficient1 1.0499e+04 7.0947e+02 14.7989 < 2.2e-16 ***
exterior_typemetal -8.7770e+02 1.8545e+03 -0.4733 0.6360072
exterior_typeother 7.9162e+03 8.8607e+02 8.9341 < 2.2e-16 ***
exterior_typevinyl 1.8030e+03 9.2587e+02 1.9474 0.0515005 .
exterior_typewood 2.4987e+03 1.3755e+03 1.8166 0.0692898 .
exterior_featurescourtyard 2.3714e+04 1.2638e+04 1.8764 0.0606085 .
exterior_featuresfence -2.4910e+04 4.2901e+03 -5.8063 6.466e-09 ***
exterior_featuresnone -2.0388e+04 4.2886e+03 -4.7539 2.007e-06 ***
exterior_featuresporch -2.4831e+04 4.3365e+03 -5.7260 1.040e-08 ***
exterior_featurestennis_court 2.3169e+03 1.0105e+04 0.2293 0.8186463
fireplace1 1.0708e+04 6.8071e+02 15.7301 < 2.2e-16 ***
foundation_typeslab 4.5790e+03 1.0141e+03 4.5155 6.348e-06 ***
foundation_typeunspecified 2.5181e+03 1.0937e+03 2.3023 0.0213285 *
beds_total1 -8.8157e+03 2.0732e+04 -0.4252 0.6706851
beds_total2 -1.8691e+04 2.0643e+04 -0.9055 0.3652225
beds_total3 -2.6547e+04 2.0670e+04 -1.2843 0.1990422
beds_total4 -2.0696e+04 2.0696e+04 -1.0000 0.3173106
beds_total5 -3.5958e+04 2.1091e+04 -1.7049 0.0882328 .
bath_full1 -1.4452e+04 1.3423e+04 -1.0766 0.2816580
bath_full2 -7.5442e+03 1.3405e+04 -0.5628 0.5735714
bath_full3 1.4197e+04 1.3524e+04 1.0497 0.2938466
bath_full4 7.3672e+03 2.0346e+04 0.3621 0.7172836
bath_full6 4.0238e+04 1.4399e+04 2.7945 0.0052018 **
bath_half1 1.1964e+04 9.6620e+02 12.3824 < 2.2e-16 ***
bath_half2 2.3815e+04 7.0879e+03 3.3599 0.0007808 ***
bath_half3 5.8803e+04 9.6367e+03 6.1020 1.063e-09 ***
bath_half4 1.0372e+05 2.8037e+03 36.9928 < 2.2e-16 ***
bath_half5 -2.7354e+04 2.1637e+04 -1.2642 0.2061652
age -1.5156e+03 6.6548e+01 -22.7751 < 2.2e-16 ***
dom -1.1463e+01 5.3153e+00 -2.1567 0.0310414 *
sewer_typeseptic -6.1909e+03 1.1518e+03 -5.3749 7.732e-08 ***
sewer_typeunspecified -4.5154e+03 6.2787e+02 -7.1916 6.591e-13 ***
property_stylenot_mobile 2.8815e+04 1.5438e+03 18.6650 < 2.2e-16 ***
subdivision1 2.4505e+03 7.3353e+02 3.3407 0.0008370 ***
water_typewell 2.4004e+03 3.2270e+03 0.7438 0.4569754
waterfront1 1.7312e+04 1.2796e+03 13.5289 < 2.2e-16 ***
bottom25_dom1 7.6382e+03 8.1707e+02 9.3483 < 2.2e-16 ***
age_2 1.3323e+01 9.1771e-01 14.5177 < 2.2e-16 ***
area_living_2 1.5910e-02 1.5353e-03 10.3627 < 2.2e-16 ***
data_factor$infections_3mma 7.8359e+00 4.6475e-01 16.8604 < 2.2e-16 ***
bottom25_sold_price -7.9381e+04 7.9732e+02 -99.5596 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_sold_price -5.1963e+00 7.5026e-01 -6.9259 4.440e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
age_mean_data <- ddply(data_factor, "infections_period", summarise, age_mean = mean(age, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = age)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Age Distribution") +
geom_vline(aes(xintercept = mean(age)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Age of Property") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = age, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Age Distributions") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
geom_vline(data = age_mean_data, aes(xintercept = age_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = age_mean_data, aes(xintercept = age_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Age of Property") +
ylab("Density")
?scale_fill_discrete()
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = age, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_age, bottom25_age), scales = "free") +
ggtitle("Age Distributions") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
labs(fill = "Infection Period") +
xlab("Age of Property") +
ylab("Density")
#Age on Infections
ggplot(data_factor, aes(x = infections_period, y = age, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
ggtitle("Comparison of Age") +
xlab("Infection Period") +
ylab("Age of Property") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_age_bottom, vcov = vcovHC(lm_corona_age_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2697e+05 3.1235e+04 4.0649 4.821e-05 ***
ac_typenone -4.4551e+04 1.9540e+03 -22.8002 < 2.2e-16 ***
ac_typenot_central -1.4115e+04 1.5258e+03 -9.2508 < 2.2e-16 ***
patio1 8.8898e+03 7.5883e+02 11.7151 < 2.2e-16 ***
school_general1 1.0875e+04 1.0179e+03 10.6833 < 2.2e-16 ***
photo_count 8.7047e+02 4.7073e+01 18.4919 < 2.2e-16 ***
pool1 8.6980e+03 1.3425e+03 6.4787 9.426e-11 ***
roof_typeother 3.2938e+03 1.4117e+03 2.3331 0.0196485 *
roof_typeshingle 2.1981e+04 1.6190e+03 13.5770 < 2.2e-16 ***
roof_typeslate 7.4667e+03 8.7967e+03 0.8488 0.3959985
gas_typenatural -9.3517e+04 3.5479e+03 -26.3584 < 2.2e-16 ***
gas_typenone -1.2593e+05 2.5036e+03 -50.3013 < 2.2e-16 ***
gas_typepropane -9.3374e+04 1.8322e+04 -5.0962 3.491e-07 ***
gas_typeunknown -1.2906e+05 2.4005e+03 -53.7657 < 2.2e-16 ***
out_building1 -6.3411e+03 8.0363e+02 -7.8906 3.132e-15 ***
land_acres 3.7274e+03 9.3788e+02 3.9743 7.080e-05 ***
appliances1 2.5344e+04 1.1141e+03 22.7473 < 2.2e-16 ***
garage1 1.3975e+04 7.4840e+02 18.6729 < 2.2e-16 ***
property_conditionnew -5.0567e+03 6.2096e+03 -0.8143 0.4154609
property_conditionother -2.0425e+04 9.3375e+02 -21.8744 < 2.2e-16 ***
energy_efficient1 1.5099e+04 8.2260e+02 18.3556 < 2.2e-16 ***
exterior_typemetal -2.5826e+02 2.3574e+03 -0.1096 0.9127626
exterior_typeother 1.2023e+04 1.0621e+03 11.3193 < 2.2e-16 ***
exterior_typevinyl 5.5811e+03 1.0999e+03 5.0740 3.924e-07 ***
exterior_typewood 3.2842e+03 1.7401e+03 1.8874 0.0591170 .
exterior_featurescourtyard 4.5132e+04 1.5039e+04 3.0009 0.0026945 **
exterior_featuresfence -1.4935e+04 4.9711e+03 -3.0044 0.0026641 **
exterior_featuresnone -7.0656e+03 4.9869e+03 -1.4168 0.1565455
exterior_featuresporch -1.2793e+04 5.0376e+03 -2.5394 0.0111099 *
exterior_featurestennis_court 1.9681e+04 1.0734e+04 1.8335 0.0667358 .
fireplace1 1.2147e+04 8.0986e+02 14.9993 < 2.2e-16 ***
foundation_typeslab 1.3287e+04 1.2600e+03 10.5449 < 2.2e-16 ***
foundation_typeunspecified 7.2193e+03 1.4077e+03 5.1285 2.943e-07 ***
beds_total1 -2.4876e+04 2.7454e+04 -0.9061 0.3649013
beds_total2 -2.7848e+04 2.7266e+04 -1.0214 0.3070895
beds_total3 -2.5167e+04 2.7263e+04 -0.9231 0.3559582
beds_total4 -2.0523e+04 2.7292e+04 -0.7520 0.4520777
beds_total5 -3.6440e+04 2.7717e+04 -1.3147 0.1886165
bath_full1 -3.8425e+04 2.3952e+04 -1.6043 0.1086671
bath_full2 -1.3304e+04 2.3946e+04 -0.5556 0.5784882
bath_full3 6.7324e+03 2.4019e+04 0.2803 0.7792514
bath_full4 1.1122e+03 2.9704e+04 0.0374 0.9701332
bath_full6 -9.3526e+03 2.4547e+04 -0.3810 0.7031968
bath_half1 1.0957e+04 1.0886e+03 10.0654 < 2.2e-16 ***
bath_half2 3.0347e+04 6.6151e+03 4.5875 4.508e-06 ***
bath_half3 6.3102e+04 9.8354e+03 6.4158 1.427e-10 ***
bath_half4 8.8086e+04 3.1342e+03 28.1044 < 2.2e-16 ***
bath_half5 -5.2038e+04 2.5797e+04 -2.0172 0.0436870 *
dom -2.3526e+01 6.3618e+00 -3.6980 0.0002178 ***
sold_date 1.2018e+00 4.5403e-01 2.6470 0.0081272 **
sewer_typeseptic -6.1146e+03 1.4223e+03 -4.2992 1.721e-05 ***
sewer_typeunspecified -3.7371e+03 7.3964e+02 -5.0526 4.389e-07 ***
property_stylenot_mobile 6.9362e+04 1.7331e+03 40.0209 < 2.2e-16 ***
subdivision1 3.5289e+03 9.0359e+02 3.9054 9.432e-05 ***
water_typewell -5.1055e+02 3.9561e+03 -0.1291 0.8973140
waterfront1 2.0044e+04 1.4711e+03 13.6256 < 2.2e-16 ***
bottom25_dom1 1.0831e+04 9.7524e+02 11.1063 < 2.2e-16 ***
area_living_2 1.6847e-02 4.0061e-04 42.0532 < 2.2e-16 ***
data_factor$infections_3mma 8.6802e+00 6.9100e-01 12.5617 < 2.2e-16 ***
bottom25_age 2.5461e+04 9.3603e+02 27.2015 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_age 7.9891e-01 8.6329e-01 0.9254 0.3547531
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#area_living on Infections
ggplot(data_factor, aes(x = infections_period, y = sold_price/area_living, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Living Area per Sqft.") +
xlab("Infection Period") +
ylab("Price per Living Area") +
scale_fill_manual(values=c(very_low, med)) +
scale_y_continuous(limits = c(0,250))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
Warning: Removed 68 rows containing non-finite values (stat_ydensity).
Warning: Removed 68 rows containing non-finite values (stat_boxplot).
coeftest(lm_corona_area_living_top, vcov = vcovHC(lm_corona_area_living_top, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.1786e+05 3.1676e+04 6.8776 6.232e-12 ***
ac_typenone -4.5055e+04 1.9559e+03 -23.0347 < 2.2e-16 ***
ac_typenot_central -1.4868e+04 1.5495e+03 -9.5953 < 2.2e-16 ***
patio1 9.0080e+03 7.7344e+02 11.6466 < 2.2e-16 ***
school_general1 8.5012e+03 1.0371e+03 8.1972 2.580e-16 ***
photo_count 1.0514e+03 4.8373e+01 21.7341 < 2.2e-16 ***
pool1 1.1293e+04 1.3644e+03 8.2768 < 2.2e-16 ***
roof_typeother 4.4255e+03 1.4022e+03 3.1562 0.001601 **
roof_typeshingle 2.2060e+04 1.6174e+03 13.6392 < 2.2e-16 ***
roof_typeslate 7.5155e+03 8.9239e+03 0.8422 0.399699
gas_typenatural -1.0170e+05 3.5575e+03 -28.5866 < 2.2e-16 ***
gas_typenone -1.3662e+05 2.4951e+03 -54.7537 < 2.2e-16 ***
gas_typepropane -1.0682e+05 1.7037e+04 -6.2695 3.682e-10 ***
gas_typeunknown -1.3645e+05 2.3853e+03 -57.2035 < 2.2e-16 ***
out_building1 -4.6203e+03 8.2442e+02 -5.6043 2.114e-08 ***
land_acres 5.3460e+03 9.4296e+02 5.6694 1.449e-08 ***
appliances1 2.4211e+04 1.1233e+03 21.5532 < 2.2e-16 ***
garage1 1.4565e+04 7.6157e+02 19.1246 < 2.2e-16 ***
property_conditionnew -1.8401e+04 6.3472e+03 -2.8992 0.003745 **
property_conditionother -1.9911e+04 9.6101e+02 -20.7193 < 2.2e-16 ***
energy_efficient1 1.4472e+04 8.3430e+02 17.3469 < 2.2e-16 ***
exterior_typemetal -1.9967e+03 2.3583e+03 -0.8466 0.397202
exterior_typeother 1.1470e+04 1.0702e+03 10.7168 < 2.2e-16 ***
exterior_typevinyl 3.3346e+03 1.1187e+03 2.9807 0.002878 **
exterior_typewood 1.4095e+03 1.7583e+03 0.8016 0.422795
exterior_featurescourtyard 4.3545e+04 1.4388e+04 3.0266 0.002476 **
exterior_featuresfence -1.3817e+04 5.1863e+03 -2.6641 0.007724 **
exterior_featuresnone -6.5726e+03 5.2004e+03 -1.2639 0.206292
exterior_featuresporch -1.3384e+04 5.2569e+03 -2.5460 0.010902 *
exterior_featurestennis_court 2.0086e+04 1.1798e+04 1.7025 0.088672 .
fireplace1 1.9245e+04 7.9863e+02 24.0978 < 2.2e-16 ***
foundation_typeslab 1.3112e+04 1.2770e+03 10.2682 < 2.2e-16 ***
foundation_typeunspecified 7.1261e+03 1.4117e+03 5.0479 4.499e-07 ***
beds_total1 -2.4323e+04 2.8620e+04 -0.8499 0.395408
beds_total2 -2.0505e+04 2.8451e+04 -0.7207 0.471080
beds_total3 -8.6578e+03 2.8444e+04 -0.3044 0.760845
beds_total4 2.7001e+03 2.8461e+04 0.0949 0.924420
beds_total5 -4.1571e+03 2.8867e+04 -0.1440 0.885494
bath_full1 -6.0233e+04 2.5477e+04 -2.3642 0.018076 *
bath_full2 -2.2302e+04 2.5477e+04 -0.8754 0.381375
bath_full3 7.4722e+03 2.5549e+04 0.2925 0.769934
bath_full4 1.4402e+04 3.1333e+04 0.4596 0.645772
bath_full6 -2.8661e+04 2.6137e+04 -1.0965 0.272850
bath_half1 1.6583e+04 1.0888e+03 15.2302 < 2.2e-16 ***
bath_half2 4.0757e+04 6.9462e+03 5.8675 4.482e-09 ***
bath_half3 7.1768e+04 1.0544e+04 6.8065 1.023e-11 ***
bath_half4 7.0281e+04 3.5280e+03 19.9209 < 2.2e-16 ***
bath_half5 -4.1415e+04 4.2895e+04 -0.9655 0.334309
age -1.9822e+03 7.9428e+01 -24.9554 < 2.2e-16 ***
dom -1.5428e+01 6.4584e+00 -2.3888 0.016913 *
sold_date 6.1595e-01 4.7569e-01 1.2949 0.195377
sewer_typeseptic -6.0698e+03 1.4378e+03 -4.2215 2.436e-05 ***
sewer_typeunspecified -4.9915e+03 7.5983e+02 -6.5692 5.161e-11 ***
property_stylenot_mobile 7.3425e+04 1.7430e+03 42.1247 < 2.2e-16 ***
subdivision1 2.8893e+03 9.1343e+02 3.1631 0.001563 **
water_typewell 1.6295e+03 4.0353e+03 0.4038 0.686356
waterfront1 2.0708e+04 1.4983e+03 13.8212 < 2.2e-16 ***
bottom25_dom1 1.1119e+04 9.9811e+02 11.1405 < 2.2e-16 ***
age_2 1.8417e+01 1.0995e+00 16.7510 < 2.2e-16 ***
data_factor$infections_3mma 8.6770e+00 5.9698e-01 14.5349 < 2.2e-16 ***
top25_area_living 3.8537e+04 1.3446e+03 28.6604 < 2.2e-16 ***
data_factor$infections_3mma:top25_area_living 9.7205e-01 1.2004e+00 0.8098 0.418087
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
dom_mean_data <- ddply(data_factor, "infections_period", summarise, dom_mean = mean(dom, na.rm = TRUE))
# Distribution: Just for City
ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Days on Market Distribution") +
geom_vline(aes(xintercept = mean(dom)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Days on Market") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = dom, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Days on Market Distributions") +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = dom, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_dom, bottom25_dom), scales = "free") +
ggtitle("Days on Market Distributions") +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#dom on Infections
ggplot(data_factor, aes(x = infections_period, y = dom, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Days on Market") +
xlab("Infection Period") +
ylab("Days on Market") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_dom_bottom, vcov = vcovHC(lm_corona_dom_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.7107e+05 3.2306e+04 5.2953 1.199e-07 ***
ac_typenone -4.3404e+04 1.9672e+03 -22.0638 < 2.2e-16 ***
ac_typenot_central -1.3244e+04 1.5285e+03 -8.6644 < 2.2e-16 ***
patio1 7.9510e+03 7.5105e+02 10.5866 < 2.2e-16 ***
school_general1 1.0340e+04 1.0042e+03 10.2968 < 2.2e-16 ***
photo_count 9.5789e+02 4.6938e+01 20.4074 < 2.2e-16 ***
pool1 9.9372e+03 1.3281e+03 7.4821 7.561e-14 ***
roof_typeother 2.6563e+03 1.4045e+03 1.8913 0.0585958 .
roof_typeshingle 1.9971e+04 1.6039e+03 12.4519 < 2.2e-16 ***
roof_typeslate 6.1511e+03 8.7277e+03 0.7048 0.4809591
gas_typenatural -1.0026e+05 3.4236e+03 -29.2838 < 2.2e-16 ***
gas_typenone -1.3330e+05 2.4175e+03 -55.1396 < 2.2e-16 ***
gas_typepropane -1.0165e+05 1.7735e+04 -5.7315 1.007e-08 ***
gas_typeunknown -1.3740e+05 2.3153e+03 -59.3429 < 2.2e-16 ***
out_building1 -5.0020e+03 8.0229e+02 -6.2347 4.601e-10 ***
area_living 4.3924e+01 6.0025e+00 7.3176 2.603e-13 ***
land_acres 3.1769e+03 9.3297e+02 3.4052 0.0006623 ***
appliances1 2.4716e+04 1.1047e+03 22.3735 < 2.2e-16 ***
garage1 1.2482e+04 7.4492e+02 16.7561 < 2.2e-16 ***
property_conditionnew -2.2693e+04 6.0403e+03 -3.7569 0.0001725 ***
property_conditionother -2.0434e+04 9.2182e+02 -22.1671 < 2.2e-16 ***
energy_efficient1 1.3977e+04 8.1089e+02 17.2373 < 2.2e-16 ***
exterior_typemetal 1.0138e+02 2.3235e+03 0.0436 0.9651991
exterior_typeother 1.0975e+04 1.0409e+03 10.5441 < 2.2e-16 ***
exterior_typevinyl 5.0221e+03 1.0863e+03 4.6229 3.803e-06 ***
exterior_typewood 2.6632e+03 1.7144e+03 1.5535 0.1203185
exterior_featurescourtyard 4.1749e+04 1.4307e+04 2.9182 0.0035241 **
exterior_featuresfence -1.5150e+04 4.9367e+03 -3.0688 0.0021514 **
exterior_featuresnone -8.7504e+03 4.9532e+03 -1.7666 0.0773066 .
exterior_featuresporch -1.5513e+04 5.0065e+03 -3.0985 0.0019472 **
exterior_featurestennis_court 1.8387e+04 1.0849e+04 1.6947 0.0901406 .
fireplace1 1.1927e+04 8.0873e+02 14.7472 < 2.2e-16 ***
foundation_typeslab 1.3986e+04 1.2569e+03 11.1274 < 2.2e-16 ***
foundation_typeunspecified 8.1005e+03 1.3966e+03 5.8003 6.702e-09 ***
beds_total1 -3.0210e+04 2.6492e+04 -1.1404 0.2541445
beds_total2 -3.8584e+04 2.6387e+04 -1.4622 0.1436894
beds_total3 -3.8581e+04 2.6423e+04 -1.4601 0.1442628
beds_total4 -3.4451e+04 2.6453e+04 -1.3024 0.1928085
beds_total5 -4.9987e+04 2.6861e+04 -1.8609 0.0627692 .
bath_full1 -3.1190e+04 2.3436e+04 -1.3308 0.1832546
bath_full2 -9.6994e+03 2.3424e+04 -0.4141 0.6788250
bath_full3 1.2111e+04 2.3511e+04 0.5151 0.6064690
bath_full4 8.9598e+03 2.9138e+04 0.3075 0.7584715
bath_full6 -1.3950e+04 2.4076e+04 -0.5794 0.5623287
bath_half1 1.1111e+04 1.0808e+03 10.2800 < 2.2e-16 ***
bath_half2 3.1065e+04 6.8469e+03 4.5371 5.730e-06 ***
bath_half3 5.8056e+04 1.1205e+04 5.1814 2.220e-07 ***
bath_half4 8.7758e+04 3.1894e+03 27.5153 < 2.2e-16 ***
bath_half5 -5.6301e+04 2.8378e+04 -1.9839 0.0472744 *
age -1.9126e+03 7.9668e+01 -24.0072 < 2.2e-16 ***
sold_date 2.6907e-01 4.6365e-01 0.5803 0.5616918
sewer_typeseptic -5.6527e+03 1.4179e+03 -3.9865 6.724e-05 ***
sewer_typeunspecified -4.2515e+03 7.3564e+02 -5.7793 7.592e-09 ***
property_stylenot_mobile 6.7594e+04 1.7413e+03 38.8190 < 2.2e-16 ***
subdivision1 3.5261e+03 8.9118e+02 3.9567 7.621e-05 ***
water_typewell 1.3123e+03 3.9007e+03 0.3364 0.7365576
waterfront1 1.9832e+04 1.4576e+03 13.6066 < 2.2e-16 ***
bottom25_dom1 1.3495e+04 9.6775e+02 13.9442 < 2.2e-16 ***
age_2 1.7151e+01 1.1195e+00 15.3204 < 2.2e-16 ***
area_living_2 5.3725e-03 1.7186e-03 3.1262 0.0017730 **
data_factor$infections_3mma 1.0086e+01 7.3559e-01 13.7116 < 2.2e-16 ***
bottom25_dom1:data_factor$infections_3mma -2.1852e+00 8.9042e-01 -2.4541 0.0141291 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(data = subset(data_factor, data_factor$city_limits == 1), aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution of Properties Not in City Limits") +
geom_vline(aes(xintercept = mean(city_limits)), linetype="dashed", size= 0.4, alpha = 0.5) +
xlab("Sold Price") +
ylab("Density")
Warning in mean.default(city_limits) :
argument is not numeric or logical: returning NA
Warning: Removed 23399 rows containing missing values (geom_vline).
coeftest(lm_corona_city_rural, vcov = vcovHC(lm_corona_city_rural, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.4143e+05 4.8668e+04 -7.0155 4.345e-12 ***
property_typeOTH 5.9966e+04 2.0128e+04 2.9792 0.0029632 **
property_typePAT 5.0463e+04 2.0905e+04 2.4139 0.0159699 *
property_typeSGL 2.0141e+04 9.7828e+03 2.0588 0.0397884 *
property_typeTNH -2.8829e+04 1.6519e+04 -1.7452 0.0812644 .
ac_typenone -2.9466e+04 8.5567e+03 -3.4437 0.0005989 ***
ac_typenot_central -7.7233e+02 6.8323e+03 -0.1130 0.9100222
patio1 1.0794e+04 3.4160e+03 3.1598 0.0016286 **
school_general1 -1.1894e+03 3.9222e+03 -0.3032 0.7617664
photo_count 1.1512e+03 1.9343e+02 5.9512 3.738e-09 ***
pool1 1.6295e+04 3.9838e+03 4.0904 4.671e-05 ***
roof_typeother 1.0670e+04 8.1028e+03 1.3168 0.1882135
roof_typeshingle 1.5286e+04 8.0765e+03 1.8926 0.0587103 .
gas_typenone 7.8179e+02 4.2828e+03 0.1825 0.8551965
gas_typepropane 3.6942e+04 1.1281e+04 3.2746 0.0010963 **
gas_typeunknown 1.9356e+04 5.9092e+03 3.2756 0.0010925 **
out_building1 -1.3561e+03 3.4266e+03 -0.3958 0.6923739
area_living 1.0072e+02 2.6548e+01 3.7941 0.0001575 ***
land_acres 8.5374e+03 4.0315e+03 2.1177 0.0344615 *
appliances1 2.3567e+04 4.2778e+03 5.5091 4.642e-08 ***
garage1 7.3142e+03 3.2891e+03 2.2237 0.0263995 *
property_conditionnew -3.5255e+03 6.5852e+03 -0.5354 0.5925169
property_conditionother -2.7082e+04 5.3956e+03 -5.0192 6.192e-07 ***
energy_efficient1 -5.8615e+03 4.4258e+03 -1.3244 0.1856881
exterior_typemetal -9.9055e+03 1.0796e+04 -0.9175 0.3591104
exterior_typeother 6.1910e+03 7.3193e+03 0.8459 0.3978484
exterior_typevinyl -7.3989e+03 4.5282e+03 -1.6340 0.1025977
exterior_typewood -2.5660e+04 7.9077e+03 -3.2450 0.0012155 **
exterior_featurescourtyard -4.8332e+04 3.4246e+04 -1.4113 0.1584756
exterior_featuresfence -2.4676e+04 1.1476e+04 -2.1502 0.0317882 *
exterior_featuresnone -1.9079e+04 1.1533e+04 -1.6542 0.0984079 .
exterior_featuresporch -1.1031e+04 1.1667e+04 -0.9455 0.3446550
exterior_featurestennis_court 2.7145e+04 1.4919e+04 1.8195 0.0691539 .
fireplace1 1.0197e+04 3.2214e+03 3.1654 0.0015976 **
foundation_typeslab -5.2275e+02 7.6668e+03 -0.0682 0.9456542
foundation_typeunspecified -1.4610e+04 1.4393e+04 -1.0151 0.3102970
beds_total1 -5.5553e+04 1.3747e+04 -4.0409 5.753e-05 ***
beds_total2 -5.1334e+04 1.5556e+04 -3.3000 0.0010029 **
beds_total3 -5.9662e+04 1.7067e+04 -3.4958 0.0004945 ***
beds_total4 -6.5707e+04 1.7882e+04 -3.6745 0.0002516 ***
beds_total5 -4.6911e+04 2.5386e+04 -1.8479 0.0649205 .
bath_full1 -9.0671e+04 1.7381e+04 -5.2166 2.236e-07 ***
bath_full2 -7.8715e+04 1.5496e+04 -5.0798 4.546e-07 ***
bath_full3 -4.1378e+04 1.6713e+04 -2.4757 0.0134693 *
bath_full4 4.2342e+04 2.5150e+04 1.6836 0.0925961 .
bath_half1 -3.8157e+03 4.8428e+03 -0.7879 0.4309485
bath_half2 3.4910e+04 1.9257e+04 1.8129 0.0701697 .
bath_half5 -3.2351e+04 1.5350e+04 -2.1076 0.0353307 *
age -1.1447e+03 3.2894e+02 -3.4801 0.0005240 ***
dom 2.4069e+01 5.1787e+01 0.4648 0.6422029
sold_date 2.2581e+01 2.1361e+00 10.5710 < 2.2e-16 ***
sewer_typeseptic 1.6363e+03 5.3982e+03 0.3031 0.7618687
sewer_typeunspecified -7.1618e+03 2.9622e+03 -2.4177 0.0158061 *
property_stylenot_mobile 7.7125e+04 1.7228e+04 4.4766 8.500e-06 ***
subdivision1 5.0676e+03 3.5932e+03 1.4103 0.1587639
water_typewell 2.0304e+03 5.7153e+03 0.3553 0.7224772
waterfront1 2.0067e+04 4.9263e+03 4.0735 5.017e-05 ***
bottom25_dom1 6.9918e+02 3.9669e+03 0.1763 0.8601307
age_2 9.7360e+00 5.3031e+00 1.8359 0.0666857 .
area_living_2 -4.0151e-03 7.2064e-03 -0.5572 0.5775524
infections_3mma 2.0551e+00 1.7745e+00 1.1581 0.2471028
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# packages
require(ggplot2)
install.packages("ggmap")
require(maps)
install.packages(Geoc)
#Basic Map
LA <- map_data("state", region="louisiana")
ggplot(LA, aes(x=long, y=lat))+geom_polygon()
# data
salesCalls <- data.frame(State=rep("louisiana",5),
City=c("Baton Rouge","New Orleans", "Shreveport", "Lafayette", "Mandeville"),
Calls=c(10,5,8,13,2))
salesCalls <- cbind(geocode(as.character(salesCalls$City)), salesCalls)
?cbind
ggplot(LA, aes(x=long, y=lat)) +
geom_polygon() +
coord_map() +
geom_point(data=salesCalls, aes(x=lon, y=lat, size=Calls), color="orange")
library(boot) # K-fold
library(leaps) # Subset
library(glmnet) #glmnet() is the main function in the glmnet package (must pass in an x matrix as well as a y vector)
# Set x-y definitions for glmnet package
x <- model.matrix(sold_price ~ . ,data = data_factor_core_clean)[, -1]
y <- data_factor_core_clean$sold_price[1:24653] # Manually restricted due rows not matching with x 'x' for an unknown reason
# General grid
grid <- exp(seq(10, -65, length = 101)) #grid of values from exp(10) [null model] to exp(-15) [least squares]
#Lasso
set.seed(1)
cv.out <- cv.glmnet(x, y, alpha = 1, lambda = grid, nfolds = 10) #lasso
plot(cv.out)
# Base decision
bestlam <- cv.out$lambda.min; bestlam; log(bestlam)
out <- cv.out$glmnet.fit
lasso.coef <- predict(out, type = "coefficients", s = bestlam); lasso.coef; lasso.coef[lasso.coef != 0]
sum(abs(lasso.coef[1:31])) #l1 norm
# +1se decision
bestlam2 <- cv.out$lambda.1se; bestlam2; log(bestlam2)
lasso.coef2 <- predict(out, type = "coefficients", s = bestlam2); lasso.coef2; lasso.coef2[lasso.coef2 != 0]
sum(abs(lasso.coef2[2:31])) #l1 norm
kd <- with(MASS::geyser, MASS::kde2d(sold_price, infections_3mma, n = 50))
fig <- plot_ly(x = kd$x, y = kd$y, z = kd$z) %>% add_surface()
fig
# Correlation Matrix heatmap
# Get numeric variable
data_factor$bath_full < as.numeric(data_factor$bath_full)
num_vars <- data_factor %>% dplyr::select(where(is.numeric))
num_vars <- subset(num_vars, select = -c(top50_sold_price))
# Corr matrix
cormat <- round(cor(num_vars),2)
head(cormat)
melted_cormat <- melt(cormat)
head(melted_cormat)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = very_low,
high = high,
mid = med,
midpoint = 0,
limit = c(-1,1),
space = "Lab",
name="Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1, color = "#2E2E2E"),
axis.text.y = element_text(angle = 0, vjust = 1, size = 10, hjust = 1, color = "#2E2E2E")) +
coord_fixed() +
labs(title = "Correlation Matrix",
x = "",
y = "")
# Distribution: Total
a <- ggplot(data_factor, aes(x = sold_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
b <- ggplot(data_factor, aes(x = list_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("List Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
c <- ggplot(data_factor, aes(x = area_living)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Living Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
d <- ggplot(data_factor, aes(x = land_acres)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Land in Acres") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
e <- ggplot(data_factor, aes(x = area_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Total Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
f <- ggplot(data_factor, aes(x = age)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Age") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
g <- ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("DOM") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$sold_date <- as.Date(data_factor$sold_date)
str(data_factor)
h <- ggplot(data_factor, aes(x = sold_date)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Date") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10)) +
scale_x_date(date_labels = "%Y")
i <- ggplot(data = subset(data_factor, data_factor$infections_daily > 1), aes(x = infections_daily)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Infections Daily") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$beds_total <- as.numeric(data_factor$beds_total)
j <- ggplot(data_factor, aes(x=beds_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Bedrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_full <- as.numeric(data_factor$bath_full)
k <- ggplot(data_factor, aes(x=bath_full)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Full Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_half <- as.numeric(data_factor$bath_half)
l <- ggplot(data_factor, aes(x=bath_half)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Half Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
gridExtra::grid.arrange(a,b,c,d,e,f,g,h,i,j,k,l, nrow =4, ncol = 3)
lm_ucla <- lm(sold_price ~ pool + infections_period + pool*infections_period, data = data_factor)
summ(lm_ucla)
# load package
library(sjPlot)
library(sjmisc)
library(sjlabelled)
tab_model(lm_ucla)
end of document